###############################################################################
#######################     Patent Related Analyses   #########################
###############################################################################

library(data.table)
library(dtplyr)
library(tidyverse)
library(fixest)
library(readstata13)
library(MatchIt)
library(CBPS)

setwd("YOUR WORKING DIRECTORY")

### LOAD IN DATA FOR ANALYSIS
read_stata <- function(x) read.dta13(x) %>% as_tibble()

# Counts of number female/male authors on a patent -- used to create if a 
# team is majority female or majority male
inventor_gender_counts <- read_stata("clean_data/patent_data/inventor_gender_counts.dta")

# Data generated using 2019 MeSH terms and 2,500 characters from patent
patent_gbd <- read_stata("clean_data/patent_data/patent_gbd_level.dta")
patent_mesh_raw <- read_stata("clean_data/patent_data/patent_mesh_raw.dta")
patent_mesh_and_tree <- read_stata("clean_data/patent_data/patent_mesh_and_tree.dta")

################################################################################
###                     Patent-level regressions
################################################################################

pat_reg_data <- 
  patent_gbd %>% 
  left_join(inventor_gender_counts) %>%
  group_by(patent_id) %>% 
  filter(row_number() == 1) %>% 
  ungroup() %>% 
  mutate(
    min_f = (female_count > 0) & (male_count > female_count),
    maj_f = (female_count >= male_count) & (male_count > 0),
    all_f = (female_count > 0) & (male_count == 0),
    all_male = female_count == 0 & male_count > 0,
    pf100 = 100*patent_female,
    pm100 = 100*patent_male, 
    yr = patent_year,
    sc = subcategory_id,
    sz = pat_team_sz
  ) %>% 
  filter(
    !is.na(female_count), !is.na(male_count)
  ) %>% 
  mutate(
    asg_company = ifelse(is.na(asg_company), 0, asg_company)
  )

## Results hold no matter how you do all_female 
m_all <- feols(pf100 ~ min_f + maj_f + all_f | yr^sc + yr^sz, 
               pat_reg_data)

m_decade1 <- feols(pf100 ~ min_f + maj_f + all_f | yr^sc + yr^sz, 
                   pat_reg_data %>% filter(patent_year < 1990))

m_decade2 <- feols(pf100 ~ min_f + maj_f + all_f | yr^sc + yr^sz, 
                   pat_reg_data %>% filter(patent_year > 1989 & patent_year < 2000))

m_decade3 <- feols(pf100 ~ min_f + maj_f + all_f | yr^sc + yr^sz, 
                   pat_reg_data %>% filter(patent_year > 1999))

m_drugs <- feols(pf100 ~ min_f + maj_f + all_f | yr^sc + yr^sz, 
                 pat_reg_data %>% filter(subcategory_id == 31))

m_surgery <- feols(pf100 ~ min_f + maj_f + all_f | yr^sc + yr^sz, 
                   pat_reg_data %>% filter(subcategory_id == 32))

m_uni <- feols(pf100 ~ min_f + maj_f + all_f | yr^sc + yr^sz, 
               pat_reg_data %>% filter(asg_company != 1))

m_corp <- feols(pf100 ~ min_f + maj_f + all_f | yr^sc + yr^sz, 
                pat_reg_data %>% filter(asg_company == 1))

################################################################################
###                     MATCHING ANALYSIS: EXACT MATCHING 
################################################################################

# Construct the Disease Area MeSH term Fixed Effect Variable
pat_disease_fe <- 
  patent_mesh_and_tree %>%  
  # Only keep the disease area MeSH tree numbers with at least 2-level depth
  filter(mesh_tree_num != "", str_sub(mesh_tree_num,1,1) == "C", str_count(mesh_tree_num, "\\.") >  2) %>% 
  group_by(patent_id, mesh_term) %>% 
  slice_head(n=1) %>% 
  ungroup() %>% 
  mutate(
    # convert the tree number to unique integers
    mesh_tree = as.numeric(as.factor(mesh_term)),
    mesh_rank = patent_mesh_rank
  ) %>% 
  dplyr::select(patent_id, mesh_tree, mesh_rank) %>% 
  arrange(patent_id, mesh_rank) %>% 
  group_by(patent_id) %>% 
  # keep the top 10 disease area tree number related to an article
  slice_head(n=10) %>% 
  mutate(m = row_number(mesh_rank)) %>% 
  ungroup() %>% 
  arrange(patent_id, mesh_rank) %>% 
  dplyr::select(patent_id, mesh_tree, m) %>% 
  spread(m, mesh_tree, sep="_", fill=0) 


# Final dataset used for exact matchings
pat_mesh_reg_data <- 
  pat_reg_data %>% 
  left_join(pat_disease_fe) %>% 
  filter(m_1 != 0, !is.na(m_1))


#Year-Subcategory-Team Size-Disease Area Matching
match_teams <- function(x, data) {
  matchme <- 
    data %>% 
    mutate(treat = !!sym(x)) %>% 
    filter(all_male | !!sym(x), !is.na(m_1)) %>% 
    dplyr::select(patent_id, pf100, pm100, treat, !!sym(x), yr, sc, sz, m_1)
  
  mout <- matchit(treat ~ yr + sc + m_1 + sz, 
                  data=matchme, 
                  method="exact")
  
  mdata <- match.data(mout) %>% as_tibble()
  mdata
}

dm_all_all_f <- match_teams('all_f', pat_mesh_reg_data)
dm_decade1_all_f <- match_teams('all_f', pat_mesh_reg_data %>% filter(patent_year < 1990))
dm_decade2_all_f <- match_teams('all_f', pat_mesh_reg_data %>% filter(patent_year > 1989 & patent_year < 2000))
dm_decade3_all_f <- match_teams('all_f', pat_mesh_reg_data %>% filter(patent_year > 1999))
dm_drugs_all_f <- match_teams('all_f', pat_mesh_reg_data %>% filter(subcategory_id == 31))
dm_surgery_all_f <- match_teams('all_f', pat_mesh_reg_data %>% filter(subcategory_id == 32))
dm_uni_all_f <- match_teams('all_f', pat_mesh_reg_data %>% filter(asg_company != 1))
dm_corp_all_f <- match_teams('all_f', pat_mesh_reg_data %>% filter(asg_company == 1))

dm_all_maj_f <- match_teams('maj_f', pat_mesh_reg_data)
dm_decade1_maj_f <- match_teams('maj_f', pat_mesh_reg_data %>% filter(patent_year < 1990))
dm_decade2_maj_f <- match_teams('maj_f', pat_mesh_reg_data %>% filter(patent_year > 1989 & patent_year < 2000))
dm_decade3_maj_f <- match_teams('maj_f', pat_mesh_reg_data %>% filter(patent_year > 1999))
dm_drugs_maj_f <- match_teams('maj_f', pat_mesh_reg_data %>% filter(subcategory_id == 31))
dm_surgery_maj_f <- match_teams('maj_f', pat_mesh_reg_data %>% filter(subcategory_id == 32))
dm_uni_maj_f <- match_teams('maj_f', pat_mesh_reg_data %>% filter(asg_company != 1))
dm_corp_maj_f <- match_teams('maj_f', pat_mesh_reg_data %>% filter(asg_company == 1))

dm_all_min_f <- match_teams('min_f', pat_mesh_reg_data)
dm_decade1_min_f <- match_teams('min_f', pat_mesh_reg_data %>% filter(patent_year < 1990))
dm_decade2_min_f <- match_teams('min_f', pat_mesh_reg_data %>% filter(patent_year > 1989 & patent_year < 2000))
dm_decade3_min_f <- match_teams('min_f', pat_mesh_reg_data %>% filter(patent_year < 1999))
dm_drugs_min_f <- match_teams('min_f', pat_mesh_reg_data %>% filter(subcategory_id == 31))
dm_surgery_min_f <- match_teams('min_f', pat_mesh_reg_data %>% filter(subcategory_id == 32))
dm_uni_min_f <- match_teams('min_f', pat_mesh_reg_data %>% filter(asg_company != 1))
dm_corp_min_f <- match_teams('min_f', pat_mesh_reg_data %>% filter(asg_company == 1))


mm_all_all_f <- feols(pf100 ~ all_f | subclass, dm_all_all_f, weights = ~weights)
mm_decade1_all_f <- feols(pf100 ~ all_f | subclass, dm_decade1_all_f, weights = ~weights)
mm_decade2_all_f <- feols(pf100 ~ all_f | subclass, dm_decade2_all_f, weights = ~weights)
mm_decade3_all_f <- feols(pf100 ~ all_f | subclass, dm_decade3_all_f, weights = ~weights)
mm_drugs_all_f <- feols(pf100 ~ all_f | subclass, dm_drugs_all_f, weights = ~weights)
mm_surgery_all_f <- feols(pf100 ~ all_f | subclass, dm_surgery_all_f, weights = ~weights)
mm_uni_all_f <- feols(pf100 ~ all_f | subclass, dm_uni_all_f, weights = ~weights)
mm_corp_all_f <- feols(pf100 ~ all_f | subclass, dm_corp_all_f, weights = ~weights)

mm_all_maj_f <- feols(pf100 ~ maj_f | subclass, dm_all_maj_f, weights = ~weights)
mm_decade1_maj_f <- feols(pf100 ~ maj_f | subclass, dm_decade1_maj_f, weights = ~weights)
mm_decade2_maj_f <- feols(pf100 ~ maj_f | subclass, dm_decade2_maj_f, weights = ~weights)
mm_decade3_maj_f <- feols(pf100 ~ maj_f | subclass, dm_decade3_maj_f, weights = ~weights)
mm_drugs_maj_f <- feols(pf100 ~ maj_f | subclass, dm_drugs_maj_f, weights = ~weights)
mm_surgery_maj_f <- feols(pf100 ~ maj_f | subclass, dm_surgery_maj_f, weights = ~weights)
mm_uni_maj_f <- feols(pf100 ~ maj_f | subclass, dm_uni_maj_f, weights = ~weights)
mm_corp_maj_f <- feols(pf100 ~ maj_f | subclass, dm_corp_maj_f, weights = ~weights)

mm_all_min_f <- feols(pf100 ~ min_f | subclass, dm_all_min_f, weights = ~weights)
mm_decade1_min_f <- feols(pf100 ~ min_f | subclass, dm_decade1_min_f, weights = ~weights)
mm_decade2_min_f <- feols(pf100 ~ min_f | subclass, dm_decade2_min_f, weights = ~weights)
mm_decade3_min_f <- feols(pf100 ~ min_f | subclass, dm_decade3_min_f, weights = ~weights)
mm_drugs_min_f <- feols(pf100 ~ min_f | subclass, dm_drugs_min_f, weights = ~weights)
mm_surgery_min_f <- feols(pf100 ~ min_f | subclass, dm_surgery_min_f, weights = ~weights)
mm_uni_min_f <- feols(pf100 ~ min_f | subclass, dm_uni_min_f, weights = ~weights)
mm_corp_min_f <- feols(pf100 ~ min_f | subclass, dm_corp_min_f, weights = ~weights)

etable(
  mm_all_all_f, 
  mm_decade1_all_f, 
  mm_decade2_all_f, 
  mm_decade3_all_f, 
  mm_drugs_all_f,
  mm_surgery_all_f,
  mm_uni_all_f,
  mm_corp_all_f,
  cluster="patent_id")

etable(
  mm_all_maj_f,
  mm_decade1_maj_f,
  mm_decade2_maj_f,
  mm_decade3_maj_f, 
  mm_drugs_maj_f,
  mm_surgery_maj_f,
  mm_uni_maj_f,
  mm_corp_maj_f,
  cluster="patent_id")

etable(
  mm_all_min_f,
  mm_decade1_min_f,
  mm_decade2_min_f,
  mm_decade3_min_f, 
  mm_drugs_min_f,
  mm_surgery_min_f,
  mm_uni_min_f,
  mm_corp_min_f,
  cluster="patent_id")

################################################################################
###                      BUILD FIGUREs 1, 2, adn 3
################################################################################
library(tidyverse)
library(hrbrthemes)
library(ggthemes)
library(ggpubr)
library(scales)
library(ggrepel)

# Set the theme
theme_set(theme_minimal())

# Load in the processed data from the STATA do file build_data_for_figures_123.do
df_counts <- read_csv("clean_data/patent_data/inventor_invention_counts.csv")
df_net_sum <- read_csv("clean_data/patent_data/female_net_sums.csv") %>% 
  mutate(
    net_fm = patent_female100 - patent_male100
  )

# Counts for male/female teams
all_counts <- 
  df_counts %>% 
  group_by(pat_majority_female) %>% 
  summarize(
    patent_female = sum(patent_female),
    patent_male = sum(patent_male),
    patent_total = sum(patent_total)
  )

#= Create an aggregate count across all teams
df_counts_all <- 
  df_counts %>% 
  group_by(patent_year) %>% 
  summarize(
    patent_female = sum(patent_female),
    patent_male = sum(patent_male),
    patent_total = sum(patent_total)
  ) %>% 
  mutate(
    pat_majority_female = 2
  )


## Make Figure 1
df_counts_gender <- 
  df_counts %>% 
  mutate(
    female_relative = (patent_female - patent_male) / patent_male
  ) %>% 
  mutate(
    team = "Female-majority teams",
    team = ifelse(pat_majority_female == 0, "Male-majority teams", team)
  )

all_teams_counts <- 
  df_counts_gender %>% 
  group_by(patent_year) %>% 
  summarize(
    patent_total = sum(patent_total)
  ) %>% 
  mutate(
    team = "All teams"
  )

p_trends <-
  ggplot(data=df_counts_gender %>% select(patent_year, patent_total, team) %>% rbind(all_teams_counts),
         mapping = aes(x = patent_year, y = patent_total, group=team)) +
  geom_line(aes(linetype=team,color=team,size=team)) + 
  scale_color_manual(values=c("#333333", "#333333", "#333333")) +
  scale_linetype_manual(values=c("solid", "longdash", "dotdash")) +
  scale_size_manual(values=c(0.6, 0.45, 0.45)) + 
  scale_y_continuous(name="Number of biomedical patents", labels = comma) +
  xlab("Patent year") + 
  ggtitle("The share of inventions by female-majority teams rises from 6.3% in 1976 to\n16.2% in 2010, still accounting for fewer than 1 in 5 U.S. biomedical patents.") +
  theme(panel.border = element_rect(colour = "#333333", fill=NA, size=0.5),
        axis.text.x=element_text(colour="black"),
        axis.text.y=element_text(colour="black"),
        axis.title.y = element_text(size = 7),
        axis.title.x = element_text(size = 7),
        legend.position = c(0.23, 0.855),
        legend.key.width = grid::unit(2, "lines"),
        legend.title = element_blank(),
        legend.spacing.y = unit(-0.2, 'cm'),
        legend.key.height = unit(0.4, "cm"),
        panel.grid.minor.x = element_blank(),
        legend.text=element_text(size=7),
        axis.ticks = element_line(),
        panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
        plot.title = element_text(size = 7, hjust = 0.5),
        legend.background = element_rect(fill="white", color="white"),
        legend.margin=margin(c(0,0,0,0))
  ) 


####################  Figure 1   ###################

p_trends 

quartz(width=4.5, height=4)
p_trends
quartz.save(file="tables_figures/figure1.pdf", type="pdf")


## Make Figure 2
df_counts_team_focus <- 
  df_counts %>% 
  rbind(df_counts_all) %>% 
  mutate(
    patent_female = patent_female / patent_total,
    patent_male = patent_male / patent_total
  ) %>% 
  pivot_longer(patent_female:patent_total, names_to = "mft", values_to = "count") %>% 
  mutate(
    female_male = "Female-focused patents",
    female_male = ifelse(mft=="patent_male", "Male-focused patents", female_male),
    team = "(B)\nInventions from female-majority teams",
    team = ifelse(pat_majority_female == 0, "(A)\nInventions from male-majority teams", team),
    team = ifelse(pat_majority_female == 2, "(C)\nAll inventions", team),
  ) %>% 
  filter(mft != "patent_total")


p_count <-
  ggplot(data=df_counts_team_focus %>% filter(pat_majority_female != 2),
         mapping = aes(x = patent_year, y = count, group=female_male)) +
  geom_path(aes(linetype=female_male, size=female_male, color=female_male), linejoin = "mitre", linemitre=1) +
  facet_wrap(vars(team), scales = "free") + 
  # geom_ribbon(aes(ymin = val_min, ymax = val_max, fill = TRUE)) +
  scale_size_manual(values = c(0.6,0.6)) + 
  scale_fill_manual(values=rgb(0,0,0, alpha = 0.1), name="fill") +
  scale_color_manual(values = c("#333333","gray40")) +
  scale_linetype_manual(values=c("solid", "twodash")) +
  scale_y_continuous(name="Percentage of patents", limits=c(0.078, 0.17), breaks=seq(0.08, 0.17, 0.01), labels = scales::percent_format(accuracy = 1, prefix="  "), expand = expansion(mult = c(0, .025))) +
  scale_x_continuous(name="Patent year", limits=c(1975,2010)) +
  ggtitle("Panel A shows that male-majority teams are more likely to invent for men than for women. Panel B shows that female- \nmajority teams are especially likely to invent for women and nearly as likely to invent for men as male-majority teams are.") +
  theme(panel.border = element_rect(colour = "#333333", fill=NA, size=0.5),
        axis.text.x=element_text(colour="black"),
        axis.text.y=element_text(colour="black"),
        axis.title.y = element_text(size = 10),
        axis.title.x = element_text(size = 10),
        legend.position = c(0.15,0.9),
        legend.key.width = grid::unit(2, "lines"),
        legend.title = element_blank(),
        panel.grid.minor.x = element_blank(),
        legend.spacing.y = unit(-0.2, 'cm'),
        legend.key.height = unit(0.4, "cm"),
        legend.text=element_text(size=9),
        legend.margin=margin(c(0,0,0,0)),
        axis.ticks = element_line(),
        panel.grid.major = element_blank(),
        # panel.grid.major = element_line(size = 0.4),
        panel.grid.minor = element_blank(),
        plot.title = element_text(size = 9, hjust = 0.5),
        legend.background = element_rect(fill="white", color="white")
  )

####################  Figure 2   ###################

p_count

quartz(width=8, height=4.5)
p_count
quartz.save(file="tables_figures/figure2.pdf", type="pdf")


####################  Figure 3   ###################

df_counts_difference <- 
  df_counts %>% 
  rbind(df_counts_all) %>% 
  mutate(
    female_difference = patent_female - patent_male,
    pos = as.character((female_difference > 0)),
  ) %>% 
  mutate(
    team = "(B)\nInventions from female-majority teams",
    team = ifelse(pat_majority_female == 0, "(A)\nInventions from male-majority teams", team),
    team = ifelse(pat_majority_female == 2, "(C)\nAll inventions", team),
  )


p_difference <-
  ggplot(data=df_counts_difference,
         mapping = aes(x = patent_year, y = female_difference, fill=pos)) +
  facet_wrap(vars(team), scales = "free") + 
  geom_col(width=0.8) +
  xlab("Patent year") + 
  guides(fill = FALSE) +
  scale_fill_manual(values = c("gray40", "#333333")) + 
  scale_y_continuous(name="Count of female-focused minus male-focused patents", limits=c(-250, 150), breaks=seq(-250,150,50), labels = scales::number_format(prefix = " ")) +
  ggtitle("Panel A shows that male inventors have invented more for men than for women, which in turn has resulted in a sizeable gap in the sex focus of invention (Panel C).\nHowever, Panel B shows that, after four decades of progress, the rise in female-focused and -invented patents has shrunk and sometimes reversed that gap.") +
  theme(panel.border = element_rect(colour = "#333333", fill=NA, size=0.5),
        axis.text.x=element_text(colour="black"),
        axis.text.y=element_text(colour="black"),
        axis.title.y = element_text(size = 8),
        axis.title.x = element_text(size = 8),
        legend.position = c(0.3, 0.85),
        legend.key.width = grid::unit(2, "lines"),
        legend.title = element_blank(),
        panel.grid.minor.x = element_blank(),
        legend.text=element_text(size=7),
        axis.ticks = element_line(),
        panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
        plot.title = element_text(size = 10, hjust = 0.5),
        legend.background = element_rect(fill="white", color="white")
  )
p_difference

quartz(width=12, height=4.5)
p_difference
quartz.save(file="tables_figures/figure3.pdf", type="pdf")


###############################################################################
####################### Publication Related Analyses ##########################
###############################################################################

library(data.table)
library(dplyr)
library(tidyr)
library(stringr)
library(fixest)
library(MatchIt)
library(modelsummary)

dat <- fread("clean_data/article_data/pubmed2002_reg_meshC_types.csv")

dat <- dat[!is.na(binary_female)]

#Basic regressions
dat$f100 <- dat$binary_female * 100
dat$m100 <- dat$binary_male * 100
dat$year <- as.numeric(as.factor(dat$year))
dat$teamsize <- as.numeric(as.factor(dat$teamsize))
dat$journal_id <- as.numeric(as.factor(dat$journal_id))
dat$all_f <- as.logical(dat$all_f)
dat$all_m <- as.logical(dat$all_m)
dat$min_f <- as.logical(dat$min_f)
dat$maj_f <- as.logical(dat$maj_f)

dat <- as_tibble(dat)

###############################################################################
###                         Building Regressions     
###############################################################################

###############################################################################
###                       JCIF Related Analyses     
###############################################################################

# read in the cleaned jcif data
dat_1k <- fread("clean_data/article_data/jcif_1k_regdat.csv") %>% as_tibble()

# The baseline regression for Fugure 3
fe2_1k <- feols(f100 ~ min_f + maj_f + all_f | year^teamsize + year^journal_id, dat_1k)


###############################################################################
###                            Exact Matching     
###############################################################################

### The publication matchings require at least 50 GB RAM to run, 100 GB RAM is recommended

# matching based on Year-Team Size-Disease
match_teams <- function(x, data) {
  matchme <- 
    data %>% 
    mutate(treat = !!sym(x)) %>% 
    filter(all_m | !!sym(x), !is.na(m_1)) %>% 
    dplyr::select(PMID, f100, treat, !!sym(x), year, teamsize, journal_id, m_1)
  
  mout <- matchit(treat ~ year + m_1 + teamsize, 
                  data=matchme, 
                  method="exact")
  
  mdata <- match.data(mout) %>% as_tibble()
  mdata
}

# prepare the input data
dat_formatch <- dat_1k %>% as.data.table()

dat_formatch <- dat_formatch[!is.na(f100)]
dat_formatch <- as_tibble(dat_formatch)

# export the matched datasets for minority, majority, and all female teams
dm_all_f <- match_teams('all_f', dat_formatch)
fwrite(dm_all_f, "clean_data/article_data/dm_all_f_1k.csv")

dm_maj_f <- match_teams('maj_f', dat_formatch)
fwrite(dm_maj_f, "clean_data/article_data/dm_maj_f_1k.csv")

dm_min_f <- match_teams('min_f', dat_formatch)
fwrite(dm_min_f, "clean_data/article_data/dm_min_f_1k.csv")

#################             Matching Regressions          ###################

## data:
#year-team-mesh
dm_all_f <- as_tibble(fread("clean_data/article_data/dm_all_f_1k.csv")) %>%
  left_join(select(dat, PMID, m100), by = "PMID")

dm_maj_f <- as_tibble(fread("clean_data/article_data/dm_maj_f_1k.csv")) %>%
  left_join(select(dat, PMID, m100), by = "PMID")

dm_min_f <- as_tibble(fread("clean_data/article_data/dm_min_f_1k.csv")) %>%
  left_join(select(dat, PMID, m100), by = "PMID")


#mesh models
mm_all_f <- feols(f100 ~ all_f | year^journal_id + subclass, dm_all_f, weights = ~weights)
mm_maj_f <- feols(f100 ~ maj_f | year^journal_id + subclass, dm_maj_f, weights = ~weights)
mm_min_f <- feols(f100 ~ min_f | year^journal_id + subclass, dm_min_f, weights = ~weights)

#Results in a table
etable(fe2_1k, mm_min_f, mm_maj_f, mm_all_f, cluster = "PMID")


################################################################################
###                       Making the Coeficient Plot
################################################################################

# Plotting functions used (original source: https://github.com/ArielOrtizBobea/spec_chart/blob/master/spec_chart_function.R)
source("build_tables_figures/spec_chart_no_dot.R")

## #figure 2 by sample cuts, 8 graphs
## the function to generate one graph
chart <- function(basic, matchmin, matchmaj, matchall, titletxt = NA, label = T) {
  #basic coef
  coef_basic <- NULL
  for (ests in 1:3) {
    coef_basic <- rbind(coef_basic, get(basic)$coefficients[[ests]])
  }
  
  #match coef
  coef_match <- NULL
  for (reg in c(matchmin, matchmaj, matchall)) {
    coef_match <- rbind(coef_match, get(reg)$coefficients[[1]])
  }
  
  #combined coef
  coef <- rbind(matrix(coef_basic[1]), matrix(coef_match[1]), 
                matrix(coef_basic[2]), matrix(coef_match[2]), 
                matrix(coef_basic[3]), matrix(coef_match[3]))
  
  #basic CI95
  ci_95_basic <- NULL
  for (ests in 1:3) {
    ci_95_basic <- rbind(ci_95_basic, unname(unlist(confint(get(basic))[ests,])))
  }
  
  #match CI95
  ci_95_match <- NULL
  for (reg in c(matchmin, matchmaj, matchall)) {
    ci_95_match <- rbind(ci_95_match, unname(unlist(confint(get(reg))[1,])))
  }
  
  #combined CI95
  ci_95 <- rbind(ci_95_basic[1,], ci_95_match[1,], 
                 ci_95_basic[2,], ci_95_match[2,],
                 ci_95_basic[3,], ci_95_match[3,])
  #basic CI99
  ci_99_basic <- NULL
  for (ests in 1:3) {
    ci_99_basic <- rbind(ci_99_basic, unname(unlist(confint(get(basic), level = 0.99)[ests,])))
  }
  
  #match CI99
  ci_99_match <- NULL
  for (reg in c(matchmin, matchmaj, matchall)) {
    ci_99_match <- rbind(ci_99_match, unname(unlist(confint(get(reg), level = 0.99)[1,])))
  }
  
  #combined CI99
  ci_99 <- rbind(ci_99_basic[1,], ci_99_match[1,], 
                 ci_99_basic[2,], ci_99_match[2,],
                 ci_99_basic[3,], ci_99_match[3,])
  
  #Estimate indicators
  fmin <- matrix(rep(c(T, F), c(2, 4)))
  fmaj <- matrix(rep(c(F, T, F), c(2, 2, 2)))
  fall <- matrix(rep(c(F, T), c(4, 2)))
  vars <- cbind(fmin, fmaj, fall)
  
  #Prepare the final data
  patent_reg <- data.frame(cbind(coef, ci_95, ci_99, vars))
  for (i in 6:8) {
    patent_reg[i] <- as.logical(patent_reg[, i])
  }
  
  colnames(patent_reg) <- c("coef", "ci95_l", "co95_h", "ci99_l", "ci99_h", "fmin",
                            "fmaj", "fall")
  
  patent_reg <- dplyr::select(patent_reg, -c(fmin, fmaj, fall))
  
  #Set labels
  labels <- list("Estimates" = c("Minority Female", "Majority Female", "All Female"))
  
  #plot
  if (label == T) {
    schart_no_dot(patent_reg, n = 2, index.est = 1, index.se = NA, index.ci = 2:5, 
                  ylim = c(-3, 10), adj=c(1,1), offset=c(1.5, 1), ylab="Estimated Effect", 
                  pch.dot=c(21,21,21,21), col.dot=c("black","grey","white","black"),
                  bg.dot=c("black","white","white","black"), lwd.symbol=2, heights = c(1, 1), 
                  highlight=c(2, 4, 6), col.est=c("black","royalblue"), titleTXT = titletxt, 
                  col.est2=c("grey60","lightskyblue"), cex = c(1, 0.8), leftmargin = 12)
    ticks <- seq(-3, 10, 1)
    axis(side=2, at=ticks, las = 2, tck=-0.025)
  } else {
    schart_no_dot(patent_reg, n = 2, index.est = 1, index.se = NA, index.ci = 2:5, 
                  ylim = c(-3, 10), adj=c(1,1), offset=c(1.5, 1), ylab=NA, 
                  pch.dot=c(21,21,21,21), col.dot=c("black","grey","white","black"),
                  bg.dot=c("black","white","white","black"), lwd.symbol=2, heights = c(1, 1), 
                  highlight=c(2, 4, 6), col.est=c("black","royalblue"), titleTXT = titletxt, 
                  col.est2=c("grey60","lightskyblue"), cex = c(1, 0.8), leftmargin = 12)
    ticks <- seq(-3, 10, 1)
    axis(side=2, at=ticks, las = 2, tck=-0.025)
  }
}


### build the PubMed regression dataset for plotting
#Coefficients
coef_basic <- NULL
for (ests in 1:3) {
  coef_basic <- rbind(coef_basic, fe2_1k$coefficients[[ests]])
}

coef_match <- NULL
for (reg in c("mm_min_f", "mm_maj_f", "mm_all_f")) {
  coef_match <- rbind(coef_match, get(reg)$coefficients[[1]])
}

coef <- rbind(matrix(coef_basic[1]), matrix(coef_match[1]), matrix(coef_basic[2]), matrix(coef_match[2]),
              matrix(coef_basic[3]), matrix(coef_match[3]))

#95% CIs
ci_95_basic <- NULL
for (ests in 1:3) {
  ci_95_basic <- rbind(ci_95_basic, unname(unlist(confint(fe2_1k)[ests,])))
}

ci_95_match <- NULL
for (reg in c("mm_min_f", "mm_maj_f", "mm_all_f")) {
  ci_95_match <- rbind(ci_95_match, unname(unlist(confint(get(reg))[1,])))
}

ci_95 <- rbind(ci_95_basic[1,], ci_95_match[1,], ci_95_basic[2,], ci_95_match[2,], ci_95_basic[3,], ci_95_match[3,])

#99% CIs
ci_99_basic <- NULL
for (ests in 1:3) {
  ci_99_basic <- rbind(ci_99_basic, unname(unlist(confint(fe2_1k, level = 0.99)[ests,])))
}

ci_99_match <- NULL
for (reg in c("mm_min_f", "mm_maj_f", "mm_all_f")) {
  ci_99_match <- rbind(ci_99_match, unname(unlist(confint(get(reg), level = 0.99)[1,])))
}

ci_99 <- rbind(ci_99_basic[1,], ci_99_match[1,], ci_99_basic[2,], ci_99_match[2,], ci_99_basic[3,], ci_99_match[3,])

#FE/matching indicators
yr_teamsize <- matrix(rep(T, 6))
yr_journal <- matrix(rep(T, 6))
yr_mesh <- matrix(rep(rep(c(F, T), c(1, 1)), 3))
# strata_jn <- matrix(rep(rep(c(F, T, F), c(1, 2, 1)), 3))
strata_m1 <- matrix(rep(rep(c(F, T), c(1, 1)), 3))

fes <- cbind(yr_teamsize, yr_journal, yr_mesh, strata_m1)

#Estimate indicators
fmin <- matrix(rep(c(T, F), c(2, 4)))
fmaj <- matrix(rep(c(F, T, F), c(2, 2, 2)))
fall <- matrix(rep(c(F, T), c(4, 2)))
vars <- cbind(fmin, fmaj, fall)

#Prepare the final data
pubmed_reg_dat <- data.frame(cbind(coef, ci_95, ci_99, vars, fes))
for (i in 6:12) {
  pubmed_reg[i] <- as.logical(pubmed_reg[, i])
}

colnames(pubmed_reg) <- c("coef", "ci95_l", "co95_h", "ci99_l", "ci99_h", "fmin", "fmaj", "fall", 
                          "Year_Teamsize", "Year_Journal", "Year_Mesh", "mesh_strata")


####################  Figure 4   ###################

### The coefficient Plot
par(mfrow = c(1,2), oma=c(8,4,2,1), xpd=F)
par(mar=c(0,3,5,0))
chart("m_all", "mm_all_min_f", "mm_all_maj_f", "mm_all_all_f", 
      titletxt = "(A) Patents")

schart_no_dot(pubmed_reg_dat, n = 2, index.est = 1, index.se = NA, index.ci = 2:5, 
              ylim = c(-1, 6.5), adj=c(1,1), offset=c(1.5, 1), ylab="", 
              pch.dot=c(21,21,21,21), col.dot=c("black","grey","white","black"),
              bg.dot=c("black","white","white","black"), lwd.symbol=2, heights = c(1, 1), 
              highlight=c(2, 4, 6), col.est=c("black","#03A9F4"), titleTXT = "(B) Science", 
              col.est2=c("grey60","lightskyblue1"), cex = c(1, 0.8), leftmargin = 12)
ticks <- seq(-1, 6, 1)
axis(side=2, at=ticks, las = 2, tck=-0.025)